R script to analyse the data on virome sequencing of single Belgian mosquitoes.

necessary_packages <- c('tidyverse', 'data.table', 'scales', 'knitr', 'grid', 'magrittr', 'DT', 'here', 'gridtext',
                        'ggpubr', 'ggthemes', 'ggpmisc', 'reshape2', 'viridis', 'pals', 'circlize',
                        'vegan', 'phyloseq', 'metagenomeSeq', 'ComplexHeatmap', 'decontam')
lapply(necessary_packages, library, character.only = TRUE)
i_am("BelgianMosquitoVirome.R")
#setwd(here())
#setwd("/Users/lander/OneDrive - KU Leuven/Documents/Manuscripts/Belgian mosquitoes (2022)/BMV-analysis/")
source("BMV_functions.R")
## [1] "R version 4.1.1 (2021-08-10)"
## [1] "Running under: macOS Big Sur 10.16"
##  [1] "Biobase 2.52.0"       "BiocGenerics 0.38.0"  "circlize 0.4.13"     
##  [4] "ComplexHeatmap 2.8.0" "data.table 1.14.2"    "decontam 1.12.0"     
##  [7] "dplyr 1.0.7"          "DT 0.20"              "forcats 0.5.1"       
## [10] "ggplot2 3.3.5"        "ggpmisc 0.4.5"        "ggpp 0.4.3"          
## [13] "ggpubr 0.4.0"         "ggthemes 4.2.4"       "glmnet 4.1-3"        
## [16] "gridtext 0.1.4"       "here 1.0.1"           "knitr 1.37"          
## [19] "lattice 0.20-45"      "limma 3.48.3"         "magrittr 2.0.1"      
## [22] "Matrix 1.4-0"         "metagenomeSeq 1.34.0" "pals 1.7"            
## [25] "permute 0.9-5"        "phyloseq 1.36.0"      "purrr 0.3.4"         
## [28] "RColorBrewer 1.1-2"   "readr 2.1.1"          "reshape2 1.4.4"      
## [31] "scales 1.1.1"         "stringr 1.4.0"        "tibble 3.1.6"        
## [34] "tidyr 1.1.4"          "tidyverse 1.3.1"      "vegan 2.5-7"         
## [37] "viridis 0.6.2"        "viridisLite 0.4.0"

1 Eukaryotic virome analysis

1.1 Prepare the data

1.1.1 Load the OTU table, taxonomy file and metadata into R

OTU <- read.table("data/abundance-table.txt", header=TRUE, row.names=1, sep="\t", dec=".")
names(OTU) <- gsub(x = names(OTU), pattern = "\\.", replacement = "-")
summary(rowSums(OTU))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      33      69    1488     235 4549919
summary(colSums(OTU))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2522  107065  214236  468777  649110 3995008
tax <- read.table("data/BEmosq_classification-1000nt.tsv", header=TRUE, row.names=1, sep="\t", dec=".")
meta <- read.table("data/BEmosq_metadata.csv", header=TRUE, row.names = 1, sep=";", dec=".")
meta <- cbind(Sample=rownames(meta), meta)

1.1.2 Make a phyloseq object

OTU.UF <- otu_table(as.matrix(OTU), taxa_are_rows=T)
tax.UF <- tax_table(as.matrix(tax))
meta.UF <- sample_data(meta)

BMV <- phyloseq(OTU.UF, tax.UF, meta.UF)
BMV
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4040 taxa and 207 samples ]
## sample_data() Sample Data:       [ 207 samples by 10 sample variables ]
## tax_table()   Taxonomy Table:    [ 4040 taxa by 7 taxonomic ranks ]

1.1.3 Remove contamination

1.1.3.1 Visualize library sizes of samples and negative controls

decontam <- as.data.frame(sample_data(BMV))
decontam$LibrarySize <- sample_sums(BMV)
decontam <- decontam[order(decontam$LibrarySize),]
decontam$Index <- seq(nrow(decontam))
ggplot(data=decontam, aes(x=Index, y=LibrarySize, color=Control)) + 
  geom_point()+
  ggtitle("Library sizes")

1.1.3.2 Detect contaminants

sample_data(BMV)$is.neg <- sample_data(BMV)$Control == "Yes"
contamdf.prev <- isContaminant(BMV, method="prevalence", neg="is.neg")

Number of contaminating contigs:

table(contamdf.prev$contaminant)
## 
## FALSE  TRUE 
##  3406   634

1.1.3.3 Visualize prevalence of contaminants in samples and negative controls

ps.pa <- transform_sample_counts(BMV, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Control == "Yes", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Control == "No", ps.pa)

df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
                    contaminant=contamdf.prev$contaminant)
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
  xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")+
  ggtitle("Prevalence of contaminants in samples vs NCs")

1.1.3.4 Remove negative controls and contaminants from phyloseq object

ps.noncontam <- prune_taxa(!contamdf.prev$contaminant, BMV)
ps.noncontam
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3406 taxa and 207 samples ]
## sample_data() Sample Data:       [ 207 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 3406 taxa by 7 taxonomic ranks ]
ps.noncontam <- prune_samples(sample_data(BMV)$Control!='Yes', ps.noncontam)
BMV <- ps.noncontam
BMV
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3406 taxa and 197 samples ]
## sample_data() Sample Data:       [ 197 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 3406 taxa by 7 taxonomic ranks ]

Remove negative controls from metadata table

meta <- meta[meta$SKA_Subspecies!="control",]

1.1.4 Subset virome data

1.1.4.1 Subset only viruses and remove prokaryotic viruses and EVEs

BMV.V <- subset_taxa(BMV, Kingdom=="Viruses")
BMV.V
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 214 taxa and 197 samples ]
## sample_data() Sample Data:       [ 197 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 214 taxa by 7 taxonomic ranks ]
EVE_phage <- c("Atrato Retro-like virus", "Gurupi chuvirus-like 1", "Aedes aegypti To virus 1",
  "Aedes aegypti To virus 2", "Guato virus", "Kaiowa virus", "Atrato Chu-like virus 1",
  "Chuvirus Mos8Chu0", "Chibugado virus", "Prokaryotic dsDNA virus sp.")

BMV.V2 <- subset_taxa(BMV.V, !is.element(Species, EVE_phage))
BMV.V2 <- subset_taxa(BMV.V2, Order!="Caudovirales")
BMV.V2 <- subset_taxa(BMV.V2, Phylum!="Phage")
BMV.V2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 147 taxa and 197 samples ]
## sample_data() Sample Data:       [ 197 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 147 taxa by 7 taxonomic ranks ]

Info on sample variables and level of taxonomic ranks:

sample_variables(BMV.V2)
##  [1] "Sample"         "PCR_Subspecies" "SKA_Subspecies" "Species"       
##  [5] "Sex"            "Location"       "Control"        "lat"           
##  [9] "long"           "Municipality"   "is.neg"
rank_names(BMV.V2)
## [1] "Kingdom" "Phylum"  "Class"   "Order"   "Family"  "Genus"   "Species"

1.1.4.2 Agglomerate taxa on viral species level

BMV_species <- BMV.V2 %>%
  tax_glom(taxrank = "Species")
BMV_species
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 47 taxa and 197 samples ]
## sample_data() Sample Data:       [ 197 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 47 taxa by 7 taxonomic ranks ]
vcount <- as.data.frame(colSums(otu_table(BMV_species) != 0))

names(vcount) <- "viral_count"

vcount %>% 
  group_by(viral_count) %>% 
  count() %>% 
  ggplot(aes(x=viral_count, y=n))+
  geom_col()+
  theme_bw()

df <- merge(meta, vcount, by=0) %>% 
  group_by(viral_count, SKA_Subspecies) %>% 
  count()

p <- ggplot(df, aes(x=viral_count, y=n, label=n, fill=SKA_Subspecies))+
  geom_col()+
  scale_x_continuous(n.breaks = max(df$viral_count), name = "# viral species")+
  scale_y_continuous(breaks=seq(0, 125, 25), name = "# mosquitoes")+
  scale_fill_viridis_d(begin=0.4, end=0.9, name="")+
  #geom_text(position =position_stack(vjust=.5), size=3)+
  theme_bw()+
  theme(legend.text = element_text(face="italic"),
        #panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
p

Only keep taxa with more than 0 reads:

BMV_species <- prune_taxa(taxa_sums(BMV_species) > 0, BMV_species)
BMV_species
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 45 taxa and 197 samples ]
## sample_data() Sample Data:       [ 197 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 45 taxa by 7 taxonomic ranks ]

Only keep samples with more than 0 viral reads:

BMV_final <- prune_samples(sample_sums(BMV_species) > 0, BMV_species)
BMV_final
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 45 taxa and 82 samples ]
## sample_data() Sample Data:       [ 82 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 45 taxa by 7 taxonomic ranks ]

1.1.5 Prepare relative abundance data

Agglomerate taxa on viral Family level:

BMV_family <- BMV_final %>%
  tax_glom(taxrank = "Family")
BMV_family
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 26 taxa and 82 samples ]
## sample_data() Sample Data:       [ 82 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 26 taxa by 7 taxonomic ranks ]

Calculate relative abundance of viral families:

BMV_family_re.abund <- BMV_family %>%                   
  transform_sample_counts(function(x) {round(100*(x/sum(x)),5)} )
BMV_family_re.abund
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 26 taxa and 82 samples ]
## sample_data() Sample Data:       [ 82 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 26 taxa by 7 taxonomic ranks ]

Count mosquito species per location:

sp_count <- psmelt(BMV_family_re.abund) %>% 
  select(Sample,SKA_Subspecies,Municipality) %>% 
  distinct() %>% 
  group_by(Municipality) %>% 
  count(SKA_Subspecies) %>% 
  pivot_wider(names_from=Municipality, values_from = n) %>% 
  mutate(across(everything(), .fns = ~replace_na(.,0))) %>% 
  rename("Villers_Le_Bouillet" = `Villers-Le-Bouillet`)

Create list of locations:

loclist<-colnames(sp_count)[2:ncol(sp_count)]
rev(loclist)
## [1] "Vrasene"             "Villers_Le_Bouillet" "Natoye"             
## [4] "Maasmechelen"        "Leuven"              "Frameries"          
## [7] "Eupen"               "Bertem"

Create list of barplots with number of mosquitoes per location:

plist <- list(ggplot()+theme_void()+#ggtitle("Mosquito species")+
                theme(title = element_text(size = 6)))
for (i in rev(loclist)){
  plist[[i]] <- ggplot(sp_count, aes_string(x = "SKA_Subspecies", y=i, fill="SKA_Subspecies"))+
    geom_col()+
    #scale_color_viridis_d(begin=0.4, end=0.9)+
    scale_fill_viridis_d(begin=0.4, end=0.9, name="")+
    theme_classic()+
    theme(legend.position = "none",
          axis.ticks.x=element_blank(),
          axis.text.x = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_text(hjust = 0.5, size=10),
          panel.background = element_rect(fill = "transparent"),
          plot.background = element_rect(fill = "transparent", color = NA),
          axis.line = element_line(colour = 'black', size = 0.25),
          axis.ticks = element_line(colour = "black", size = 0.25))+
    #ggtitle(gsub("_","-",i))+
    scale_y_continuous(limits = c(0,20), expand = c(0,0), position = "right")
}
plist[10] <- list(ggplot()+theme_void())
ggarrange(plotlist = plist[2:9], labels = gsub("_","-",rev(loclist)), 
          font.label = list(size=10, face="plain"), hjust = 0, vjust = 1)

Create colorpalette for viral families:

BMV_smelt <- psmelt(BMV_final)
FamLevel <- levels(as.factor(BMV_smelt[(BMV_smelt$Family!="unclassified"),]$Family))
FamLevel <- c(FamLevel, "unclassified")
BMV_smelt$Family <- factor(BMV_smelt$Family, levels = FamLevel)

myFamCol <- c(stepped(n=20), "#E6550D", "#FD8D3C", "#FDAE6B","lightgrey")
names(myFamCol) <- levels(as.factor(BMV_smelt$Family))

Create colors for each mosquito species:

myColors <- viridis::viridis(4,1, begin=0.4, end = 0.9, direction = 1)
names(myColors) <- levels(as.factor(BMV_smelt$SKA_Subspecies))

1.1.6 Prepare heatmap data

Make table with fungi reads:

BMV_fungi <- subset_taxa(BMV, Phylum %in% c("Ascomycota", "Basidiomycota", 
                                            "Chytridiomycota", "Glomeromycota",
                                            "Zygomycota","Neocallimastigomycota",
                                            "Microsporidia"))
BMV_fungi
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 112 taxa and 197 samples ]
## sample_data() Sample Data:       [ 197 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 112 taxa by 7 taxonomic ranks ]
BMV_fungi <- tax_glom(BMV_fungi, taxrank = 'Phylum')

sample_variables(BMV_fungi)
##  [1] "Sample"         "PCR_Subspecies" "SKA_Subspecies" "Species"       
##  [5] "Sex"            "Location"       "Control"        "lat"           
##  [9] "long"           "Municipality"   "is.neg"
fungi <- as.data.frame(otu_table(BMV_fungi))
fungi2 <- fungi %>% 
  mutate(Phylum=rownames(.), .before=1) %>% 
  pivot_longer(cols = 2:198, names_to="Sample", values_to = "fungi_reads") %>% 
  pivot_wider(names_from = Phylum, values_from = fungi_reads)
fungi2
tax_table(BMV_fungi)
## Taxonomy Table:     [4 taxa by 7 taxonomic ranks]:
##                                                 Kingdom     Phylum           
## NODE_31_length_1205_cov_6.800532_NEMO53|full    "Eukaryota" "Ascomycota"     
## NODE_8_length_3532_cov_13.946454_NEMO31|full    "Eukaryota" "Chytridiomycota"
## NODE_58_length_1305_cov_1096.565961_NEMO36|full "Eukaryota" "Microsporidia"  
## NODE_129_length_1223_cov_14.773124_MEMO055|full "Eukaryota" "Basidiomycota"  
##                                                 Class Order Family Genus
## NODE_31_length_1205_cov_6.800532_NEMO53|full    NA    NA    NA     NA   
## NODE_8_length_3532_cov_13.946454_NEMO31|full    NA    NA    NA     NA   
## NODE_58_length_1305_cov_1096.565961_NEMO36|full NA    NA    NA     NA   
## NODE_129_length_1223_cov_14.773124_MEMO055|full NA    NA    NA     NA   
##                                                 Species
## NODE_31_length_1205_cov_6.800532_NEMO53|full    NA     
## NODE_8_length_3532_cov_13.946454_NEMO31|full    NA     
## NODE_58_length_1305_cov_1096.565961_NEMO36|full NA     
## NODE_129_length_1223_cov_14.773124_MEMO055|full NA
# fungi.df <- pivot_longer(fungi, cols = 1:197, names_to="Sample", values_to = "fungi_reads")
# fungi.df
# meta <- left_join(meta, fungi.df, by="Sample")
# rownames(meta) <- meta$Sample
# sample_data(BMV_final) <- data.frame(meta)

Convert phyloseq object to metagenomeseq object to make heatmap:

BMV_metaseq <- phyloseq_to_metagenomeSeq(BMV_final)

Aggregate by species:

BMV_metaseq_species <- aggregateByTaxonomy(BMV_metaseq, lvl = "Species", norm = F, 
                                           aggfun = colSums, out = "MRexperiment", alternate = T)

Count number of unique viral species:

n_species <- length(unique(featureData(BMV_metaseq_species)$Species))

Assign mosquito species for each sample:

mosquito_species <- pData(BMV_metaseq_species)$SKA_Subspecies

Assign colors to location:

location <- pData(BMV_metaseq_species)$Municipality
unique_locations <- length(unique(pData(BMV_metaseq_species)$Municipality))
locColors <- viridis::plasma(unique_locations, 1, begin=0, end = 0.9, direction = 1)
names(locColors) <- levels(as.factor(pData(BMV_metaseq_species)$Municipality))

Set heatmap colors:

heatmapCols <- colorRampPalette(brewer.pal(9, "YlOrRd"))(200)

Calculate average BLASTx values:

blastx <- read.table("data/BEmosquitoes.tab", header=T, row.names=1, dec=".", sep="\t")
blastx <- dplyr::select(blastx, 2)
blastx.UF <- otu_table(as.matrix(blastx), taxa_are_rows=T)
blastx.ps <- phyloseq(blastx.UF, tax_table(prune_taxa(taxa_sums(BMV.V2) >= 1, BMV.V2)))
blastx_metaseq <- phyloseq_to_metagenomeSeq(blastx.ps)
blastx_metaseq
## MRexperiment (storageMode: environment)
## assayData: 143 features, 1 samples 
##   element names: counts 
## protocolData: none
## phenoData: none
## featureData
##   featureNames: NODE_101_length_1206_cov_27.850310_MEMO031|full
##     NODE_101_length_1285_cov_17.030629_MEMO115|full ...
##     NODE_9_length_5477_cov_48.040926_MEMO031|full (143 total)
##   fvarLabels: OTUname Kingdom ... Species (8 total)
##   fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
#Aggregate by species
blastx_mean = aggregateByTaxonomy(blastx_metaseq, lvl = "Species", norm = F, aggfun = mean, out = "MRexperiment", alternate = T)
blastx_mean
## MRexperiment (storageMode: environment)
## assayData: 45 features, 1 samples 
##   element names: counts 
## protocolData: none
## phenoData: none
## featureData
##   featureNames: Aedes aegypti anphevirus Aedes alboannulatus
##     orthomyxo-like virus ... Yongsan picorna-like virus 2 (45 total)
##   fvarLabels: OTUname Kingdom ... Species (8 total)
##   fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:

Store average BLASTx identities in dataframe:

rowanno <- as.data.frame(returnAppropriateObj(blastx_mean, log=F, norm=F))
colnames(rowanno)[1] <- "blastx"

Create color function for BLASTx values:

col_fun=colorRamp2(c(0,100), c("white","deepskyblue4"))

Get Baltimore classification/genome composition for each viral species from ICTV:

taxV <- as.data.frame(tax_table(BMV_final))
baltimore <- read.table("data/ICTV_classification_family.tsv", header=T, sep="\t")

balt_gc <- baltimore %>% 
  select(Family, Genome.Composition) %>% 
  distinct() %>% 
  add_row(Family="Picorna-like", Genome.Composition="ssRNA(+)") %>% 
  add_row(Family="Orthomyxo-like", Genome.Composition="ssRNA(-)") %>% 
  add_row(Family="Negeviridae", Genome.Composition="ssRNA(+)") %>% 
  add_row(Family="Luteoviridae", Genome.Composition="ssRNA(+)")

tax_gc <- left_join(taxV, balt_gc, by="Family") %>%
  filter(!Genome.Composition %in% c("ssRNA(+/-)", "ssDNA(+/-)", "ssDNA(+)", "ssRNA")) %>% # remove double genome compositions
  mutate(Genome.Composition = case_when(Species == 'Sclerotinia sclerotiorum dsRNA virus 3' ~ 'dsRNA',
                                        Family == 'Reo-like'~'dsRNA',
                                        Species == 'unclassified' ~'unknown',
                                        Family == 'Totiviridae'~'dsRNA',
                                        Species == 'Hubei orthoptera virus 4' ~ 'ssRNA(+)',
                                        Species == 'Hubei odonate virus 15' ~ 'dsRNA',
                                        Species == 'Wuhan insect virus 26' ~ 'dsRNA',
                                        Species == 'Wuhan insect virus 13' ~ 'ssRNA(+)',
                                        Species == "Linepithema humile qinvirus-like virus 1" ~'ssRNA(-)',
                                        Species =="Hubei virga-like virus 23" ~ 'ssRNA(+)',
                                        Species == "Culex mononega like virus 2" ~'ssRNA(-)',
                                        TRUE ~ `Genome.Composition`))

gcanno<-tax_gc %>% select(Genome.Composition, Family)
rownames(gcanno)<-tax_gc$Species
colnames(gcanno) <- c('GC', 'Family')
gcanno<-gcanno[order(rownames(gcanno)) , ,drop=F]
gcanno$Family <- factor(gcanno$Family, levels = FamLevel)

1.2 Alpha diversity

alpha <- plot_richness(BMV_final, measures=c("Observed", "Shannon", "Simpson"), x="SKA_Subspecies", color = "SKA_Subspecies")+
  geom_boxplot()+
  geom_jitter(width=0)+
  theme_bw()+
  scale_color_viridis_d(begin=0.4, end =0.9, name="", labels=c(expression(paste(italic("Aedes japonicus"), " (n=8)")),
                                                               expression(paste(italic("Culex pipiens molestus"), " (n=23)")),
                                                               expression(paste(italic("Culex pipiens pipiens"), " (n=48)")),
                                                               expression(paste(italic("Culex torrentium"), " (n=3)"))))+
  theme(strip.text.x = element_text(size = 10),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.text.align = 0)+
  scale_y_continuous(limits = c(0, NA))+
  stat_compare_means(method = "wilcox.test", aes_string(x="SKA_Subspecies", y="value",
                                                        group="SKA_Subspecies"), 
                     label = "p.format", hide.ns=TRUE, size=3, show.legend = F, tip.length = 0.01,
                     comparisons = list(c(1, 2),c(1, 3)))
alpha

ggsave("figures/alpha-diversity-species.pdf", alpha, dpi=300)

1.3 Ordination

1.3.1 PCoA

v.ord <- ordinate(BMV_final, method = "PCoA")
pcoa <- plot_ordination(BMV_final, v.ord, type="samples", color="SKA_Subspecies", shape = "Municipality")+
  scale_shape_manual(values = c(15,16,17,3:8), guide =guide_legend(label.theme = element_text(size=10)), name="Location")+
  scale_color_viridis_d(begin=0, end =1, name="Mosquito species")+
  #stat_ellipse(type = "norm", linetype = 2, aes_string(group="SKA_Subspecies"))+
  theme_bw()+
  theme(panel.grid.minor = element_blank())+
  guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
                            label.theme = element_text(size=10, face="italic")))
pcoa

ggsave("figures/PCoA-eukaryotic-virome.pdf", pcoa, dpi=300)
?ggsave

1.3.2 NMDS full dataset

v.ord <- ordinate(BMV_final, method = "NMDS", k=2)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.007200553 
## Run 1 stress 0.003517962 
## ... New best solution
## ... Procrustes: rmse 0.09947471  max resid 0.2433535 
## Run 2 stress 0.003156862 
## ... New best solution
## ... Procrustes: rmse 0.1071991  max resid 0.3316016 
## Run 3 stress 0.002100666 
## ... New best solution
## ... Procrustes: rmse 0.1035351  max resid 0.3336336 
## Run 4 stress 0.00293833 
## Run 5 stress 0.002949758 
## Run 6 stress 0.003798832 
## Run 7 stress 0.004203698 
## Run 8 stress 0.003818626 
## Run 9 stress 0.003143299 
## Run 10 stress 0.002461951 
## ... Procrustes: rmse 0.1013816  max resid 0.3788131 
## Run 11 stress 0.003315343 
## Run 12 stress 0.002781012 
## Run 13 stress 0.006250634 
## Run 14 stress 0.005715855 
## Run 15 stress 0.002631668 
## Run 16 stress 0.003703236 
## Run 17 stress 0.002557906 
## ... Procrustes: rmse 0.1065592  max resid 0.3547282 
## Run 18 stress 0.002522315 
## ... Procrustes: rmse 0.1015723  max resid 0.3334616 
## Run 19 stress 0.004140511 
## Run 20 stress 0.001923616 
## ... New best solution
## ... Procrustes: rmse 0.1084025  max resid 0.3764071 
## *** No convergence -- monoMDS stopping criteria:
##     20: no. of iterations >= maxit
nmds <- plot_ordination(BMV_final, v.ord, type="samples", color="SKA_Subspecies", shape = "Municipality")+
  scale_shape_manual(values = c(15,16,17,3:8), guide =guide_legend(label.theme = element_text(size=10)), name="Location")+
  scale_color_viridis_d(begin=0, end =1, name="Mosquito species")+
  #stat_ellipse(type = "t", linetype = 2, aes_string(color="SKA_Subspecies"))+
  theme_bw()+
  theme(panel.grid.minor = element_blank())+
  guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
                            label.theme = element_text(size=10, face="italic")))
nmds

ggsave("figures/NMDS-eukaryotic-virome.pdf", nmds, dpi=300)

1.3.3 NMDS singletons removed

BMV_no_singletons <- BMV_final
for (i in c("MEMO014","MEMO078","MEMO145","MEMO146","MEMO149","MEMO084","MEMO018","MEMO006",
            "NEMO27","NEMO30","NEMO07","NEMO46","NEMO51","MEMO127","MEMO008")) {
  BMV_no_singletons <- subset_samples(BMV_no_singletons, Sample != i)
}
BMV_no_singletons
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 45 taxa and 68 samples ]
## sample_data() Sample Data:       [ 68 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 45 taxa by 7 taxonomic ranks ]
v.ord <- ordinate(BMV_no_singletons, method = "NMDS", k=2)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.006486394 
## Run 1 stress 0.0157372 
## Run 2 stress 0.008241914 
## Run 3 stress 0.006677143 
## ... Procrustes: rmse 0.04002152  max resid 0.08998991 
## Run 4 stress 0.01202926 
## Run 5 stress 0.009612694 
## Run 6 stress 0.01720383 
## Run 7 stress 0.006780447 
## ... Procrustes: rmse 0.03625994  max resid 0.08149941 
## Run 8 stress 0.01070085 
## Run 9 stress 0.009828719 
## Run 10 stress 0.01373342 
## Run 11 stress 0.009939506 
## Run 12 stress 0.00531327 
## ... New best solution
## ... Procrustes: rmse 0.03956133  max resid 0.1050252 
## Run 13 stress 0.008919582 
## Run 14 stress 0.0170108 
## Run 15 stress 0.01989628 
## Run 16 stress 0.01761931 
## Run 17 stress 0.005174196 
## ... New best solution
## ... Procrustes: rmse 0.04129938  max resid 0.1223024 
## Run 18 stress 0.0191238 
## Run 19 stress 0.01371698 
## Run 20 stress 0.01829183 
## *** No convergence -- monoMDS stopping criteria:
##     20: no. of iterations >= maxit
nmds_ns <- plot_ordination(BMV_no_singletons, v.ord, type="samples", color="SKA_Subspecies", shape = "Municipality")+
  scale_shape_manual(values = c(15,16,17,3:8), guide =guide_legend(label.theme = element_text(size=10)), name="Location")+
  scale_color_viridis_d(begin=0, end =1, name="Mosquito species")+
  #stat_ellipse(type = "t", linetype = 2, aes_string(color="SKA_Subspecies"))+
  theme_bw()+
  theme(panel.grid.minor = element_blank())+
  guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
                            label.theme = element_text(size=10, face="italic")))
nmds_ns

ggsave("figures/NMDS-eukaryotic-virome-no-singletons.pdf", nmds_ns, dpi=300)

ord.legend <- cowplot::get_legend(pcoa)
alpha.legend <- cowplot::get_legend(alpha)
plots <- cowplot::align_plots(alpha+theme(legend.position = "none"), 
                              pcoa+theme(legend.position = "none"), align = 'v', axis = 'l')
bottom<-cowplot::plot_grid(plots[[2]], 
                           nmds+theme(legend.position = "none"), labels=c("B","C"))
cowplot::plot_grid(plots[[1]], alpha.legend, bottom, ord.legend, labels = c('A', ''), ncol=2, rel_widths = c(1, .3))

#ggarrange(alpha, ggarrange(pcoa, nmds, labels=c("B","C"), common.legend = T, legend = "right"), labels="A", ncol=1)
ggsave("figures/combined-alpha-ordination.pdf", dpi=300)

1.4 Relative abundance

1.4.1 Relative abundance per location

phylobar_location <- merge_samples(BMV_family, "Municipality")
phylobar_location_re.abund <- phylobar_location %>%                   
  transform_sample_counts(function(x) {round(100*(x/sum(x)),5)} )

lc <- plot_relabund(phylobar_location_re.abund, fill = "Family") + 
  geom_bar(aes(fill=Family), stat="identity", position="stack")+
  ylab("Relative abundance (%)")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=0, size = 10, vjust = 0.5, hjust = 0.5),
        axis.text.y = element_text(size = 10),
        axis.title.y = element_text(size = 12, vjust=0.5),
        axis.title.x = element_text(size = 12,hjust = 0.5),
        axis.ticks.y = element_blank(),
        legend.text = element_text(face="italic"))+
  scale_fill_manual(values = myFamCol, name="Viral family")+
  scale_y_continuous(expand = c(0,0))+
  coord_flip()

Combine viral family barplot with mosquito species barplots from each location:

lc2<-ggarrange(lc+theme(legend.position = "none"), ggarrange(plotlist=plist, nrow=10, heights = c(0.2, rep(1,8),0.6)), ncol=2, widths = c(1,0.1))
ggsave("figures/viralfamily_barplot_loc.pdf", lc2, height = 6.92, width = 11.2, dpi=300)

1.4.2 Relative abundance per mosquito species

phylobar_species <- merge_samples(BMV_family, "SKA_Subspecies")
phylobar_species_re.abund <- phylobar_species %>%                   
  transform_sample_counts(function(x) {round(100*(x/sum(x)),5)} )

sp <- plot_relabund(phylobar_species_re.abund, fill = "Family") + 
  geom_bar(aes(fill=Family), stat="identity", position="stack")+
  ylab("Relative abundance (%)")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=0, size = 10, vjust = 0.5, hjust = 0.5, face="italic"),
        axis.text.y = element_text(size = 10),
        axis.title.y = element_text(size = 12, vjust=0.5),
        axis.title.x = element_text(size = 12,hjust = 0.5),
        axis.ticks.x = element_blank(),
        legend.text = element_text(face="italic"),
        legend.title = element_blank())+
  scale_fill_manual(values = myFamCol, name="Viral family")+
  scale_y_continuous(expand = c(0,0))+
  scale_x_discrete(guide = guide_axis(n.dodge=2))
ggsave("figures/viralfamily_barplot_species.pdf", sp , height = 6.92, width = 11.2, dpi=300)

Create combined legend for viral families and mosquito species:

sp2 <- plot_relabund(phylobar_species_re.abund, fill="Family", color="Sample")+
  theme_bw()+
  theme(legend.text = element_text(face="italic"),
        legend.title = element_blank(),
        legend.position = "top")+
  scale_fill_manual(values = myFamCol, name="a")+
  scale_color_manual(values = myColors, name="b")+
  guides(fill=guide_legend(nrow = 4),
         col=guide_legend(override.aes =list(fill = myColors), direction = "vertical"))
sp_leg <- get_legend(sp2)

1.4.3 Combine relative abundances

ggarrange(sp, lc2, common.legend = T, legend = "top", labels ='AUTO',
          font.label = list(size = 16), widths = c(0.5,1), legend.grob = sp_leg)

ggsave("figures/viralfamily_barplot.pdf", width = 15, height=9, dpi=300)

1.5 Eukaryotic virome heatmap

#p.fungi<-log10(pData(BMV_metaseq_species)$fungi_reads+1)
#p.fungi<-pData(BMV_metaseq_species)$fungi_reads
#p.fungi<-as.matrix(fungi2[fungi2$Sample %in% pData(BMV_metaseq_species)$Sample,2:5])
p.fungi<-log10(as.matrix(fungi2[fungi2$Sample %in% pData(BMV_metaseq_species)$Sample,2:5]))
p.fungi[p.fungi==-Inf]<-NA
colnames(p.fungi)<-as.data.frame(tax_table(BMV_fungi))$Phylum

left_ra=rowAnnotation('Family'=gcanno$Family,
                      "Blastx"= rowanno$blastx,
                      col=list('Family'=myFamCol,
                               "Blastx"=col_fun),
                      show_annotation_name=T,
                      annotation_name_side = "top",
                      annotation_name_gp = gpar(fontsize = 10, fontface = "bold"),
                      annotation_name_rot = 270,
                      annotation_legend_param=list("Blastx" = list(title ="Blastx % Identity", 
                                                                   direction="horizontal",
                                                                   at=c(0,25,50,75,100)),
                                                   "Family"=list(title="Viral family", labels_gp = gpar(fontface="italic"))))

#Draw heatmap
column_ha = HeatmapAnnotation(Location=location,
                              'Mosquito species' = mosquito_species,
                              Fungi = anno_points(p.fungi, ylim = c(0, 6),
                                                   axis_param = list(side = "right"),
                                                  gp = gpar(fill = c(2:4,7), col = c(2:4,7)),
                                                  pch=1:length(colnames(p.fungi))),
                              show_annotation_name = T,
                              annotation_label = gt_render(c("Location", "Species", "log<sub>10</sub>(Fungi)")),
                              annotation_name_side = "right",
                              annotation_name_gp = gpar(fontsize = 10, fontface = "bold"),
                              annotation_name_rot = 0,
                              annotation_legend_param=list('Mosquito species'= list(title ='Mosquito species', 
                                                                                    labels_gp = gpar(fontface="italic")),
                                                           'Location' = list(title='Location')),
                              col = list('Mosquito species'=c(myColors), Location=c(locColors)))

#n depends on the amount of taxa in 'BMV_metaseq_species'
hm <- plot_abundance(BMV_metaseq_species, n = n_species, log = T, norm = F, colclust = "bray",
                     col = heatmapCols, name = "Log2 Read Counts", 
                     top_annotation=column_ha,
                     row_names_gp = gpar(fontsize = 8),
                     show_column_names = FALSE,
                     heatmap_legend_param = list(direction = "horizontal"),
                     cluster_rows = FALSE,
                     cluster_row_slices = FALSE,
                     row_split=factor(gcanno$GC, 
                                      levels = c("dsDNA","ssDNA","dsRNA","ssRNA(+)","ssRNA(-)","unknown")),
                     row_order=tax_gc[with(tax_gc, order(tax_gc$Genome.Composition, Family)),"Species"],
                     left_annotation=left_ra,
                     row_title_gp = gpar(fontsize=10),
                     row_title_rot=0,
                     border=T)

Legends

lgd_list<-list(Legend(labels = colnames(p.fungi), title = "Fungi", type = "points", pch = 1:length(colnames(p.fungi)), 
            legend_gp = gpar(col = c(2:4,7)), background ="transparent"))
draw(hm, heatmap_legend_side = "left", annotation_legend_side = "left", 
     merge_legend = T, legend_grouping="original",
     annotation_legend_list = lgd_list)


2 RT-qPCR analysis

2.1 Prepare the data

library(ggh4x)
qPCR <- read_csv("data/BMV-RT-qPCR-analysis.csv", col_names = T)

Recalculate quantity based on mosquito dilutions:

qPCR <- qPCR %>%  
  mutate(Quantity = case_when(str_detect(Sample, "MEMO") ~ qPCR$"Quantity Mean" * 120,
                              str_detect(Sample, "NEMO") ~ qPCR$"Quantity Mean" * 300))

Divide quantity of Xanthi chryso-like virus by 2 because it is a dsRNA virus:

qPCR <- qPCR %>%  
  mutate(Quantity = case_when(str_detect(Target, "XCV") ~ qPCR$Quantity/2,
                              TRUE ~ Quantity))

Replace NA to 0:

qPCR_noNA <- qPCR %>% mutate_all(~replace(., is.na(.), 0))

Replace abbreviations for full names:

qPCR_noNA <- qPCR_noNA %>% mutate(Target, Target= case_when(Target=="CPV"~"Culex phasma virus", 
                                                            Target=="XCV"~"Xanthi chryso-like virus", 
                                                            Target=="WMV6"~"Wuhan Mosquito Virus 6",
                                                            Target=="WMV4"~"Wuhan Mosquito Virus 4",
                                                            Target=="Daeseongdong"~"Daeseongdong virus",
                                                            Target=="HMV4"~"Hubei mosquito virus 4",
                                                            TRUE ~ Target))

Merge metadata with qPCR data: Abrreviate locations to fit the names on the plots later on.

metadata <- meta %>% mutate(Municipality = case_when(Municipality == 'Frameries' ~ 'Fr',
                                                         Municipality == 'Dilsen-Stokkem' ~ 'DS',
                                                         Municipality == 'Kallo' ~ 'K',
                                                         TRUE ~ Municipality))
metaqPCR <- merge.data.frame(qPCR_noNA, metadata, by="Sample")

Count number of tested samples per mosquito species:

metaqPCR %>%
  select(Sample, SKA_Subspecies) %>% 
  distinct() %>% 
  count(SKA_Subspecies)

2.2 Plot data

legendlabels <- c(expression(paste(italic("Aedes japonicus"), " (n=8)")),
  expression(paste(italic("Culex pipiens molestus"), " (n=47)")),
  expression(paste(italic("Culex pipiens pipiens"), " (n=127)")),
  expression(paste(italic("Culex torrentium"), " (n=16)")))

qPCRviolin <- ggplot(metaqPCR, aes(x=SKA_Subspecies, y=Quantity))+
  geom_violin(aes(fill=SKA_Subspecies, color=SKA_Subspecies), position="identity")+
  geom_jitter(aes(color=SKA_Subspecies), size=1, width=0.1)+
  theme_bw()+
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        #panel.grid.major = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.text.align = 0)+
  scale_y_continuous(expand=expansion(mult = c(0, 0), add = c(0, 0.1)), 
                     trans=scales::pseudo_log_trans(base = 10), 
                     breaks = c(0, 10^seq(2,8,2)),
                     labels = scientific_10(c(0, 10^seq(2,8,2))))+
  xlab(label = "")+
  ylab(label = "Viral copies per mosquito")+
  scale_fill_viridis_d(begin=0.4, end =0.9, name="", alpha = 0.5,
                       labels=legendlabels)+
  scale_color_viridis_d(begin=0.4, end =0.9, name="",
                        labels=legendlabels)+ 
  guides(fill = guide_legend(override.aes = list(alpha = 1)))+
  facet_wrap(~Target, nrow=2, ncol=3)
ggsave("figures/qPCR-violin.pdf", dpi=300)

Plot qPCR overview per sample:

qPCRoverview <- ggplot(metaqPCR, 
             aes(x=Sample, y=Quantity))+
  facet_nested(Target~Municipality,
               space="free",
               scales = "free_x",
               labeller = labeller(Target = label_wrap_gen(10), Municipality=label_wrap_gen(5)))+
  geom_col(aes(fill=SKA_Subspecies), position="identity", alpha=1)+
  theme_bw()+
  theme(axis.text.x = element_blank(),
        panel.grid.major = element_blank())+
  scale_y_continuous(expand=expansion(mult = c(0, 0), add = c(0, 0.1)),
                     trans = scales::pseudo_log_trans(base = 10), 
                     breaks = c(0, 10^seq(2,8,2)),
                     labels = scientific_10(c(0, 10^seq(2,8,2))))+
  xlab(label = "Sample")+
  ylab(label = "Viral copies per mosquito")+
  scale_fill_viridis_d(begin=0.4, end =0.9, name="")+ 
  guides(fill = guide_legend(override.aes = list(alpha = 1), 
                             label.theme = element_text(size=10, angle = 0, face = "italic")))
ggsave("figures/qPCR_overview.pdf", width = 18.7, height = 10.3, units = "in", dpi=300)
DS=Dilsen-Stokkem, Fr=Frameries, K=Kallo

DS=Dilsen-Stokkem, Fr=Frameries, K=Kallo


3 Phylogenetics

phylo_path <- here("phylogenetics")
phylo_packages <- c('phytools', 'ggtree', 'treeio', 'rphylopic',
                    'ggimage', 'aplot')
lapply(phylo_packages, library, character.only = TRUE)

3.1 Bunyavirales

meta.bunya <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Bunyavirales/"), pattern = "*.csv", full.names = T), read_csv))
bunya.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Bunyavirales/"), pattern = "*_ictv.csv", full.names = T), read_csv))
bunya <- read.iqtree(here(phylo_path, "Bunyavirales/bunyavirales.treefile"))
bunya@phylo <- phytools::midpoint.root(bunya@phylo)

bunya@phylo$tip.label <- gsub(bunya@phylo$tip.label, pattern = "_", replacement=" ")
bunya@phylo$tip.label <- gsub(bunya@phylo$tip.label, pattern = "P ", replacement="P_")
bunya@phylo$tip.label <- gsub(bunya@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

bunya_clean <- clean_phylometa(bunya, metadata = meta.bunya)
bunya_ictv_clean <- clean_phylometa(bunya, metadata = bunya.ictv)

bunya_tree <- group_phylotaxa(bunya, bunya_clean, group = c("Family","Genus", "Host", "Geo_Location"))
bunya_tree
## 'treedata' S4 object that stored information
## of
##  '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes
## (2022)/BMV-analysis/phylogenetics/Bunyavirales/bunyavirales.treefile'.
## 
## ...@ phylo:
## 
## Phylogenetic tree with 89 tips and 88 internal nodes.
## 
## Tip labels:
##   QDZ58984.1|Niukluk phantom orthophasmavirus, YP_009362029.1|Kigluaik phantom
## orthophasmavirus, ASA47399.1|Culex orthophasmavirus, ASA47278.1|Culex
## orthophasmavirus, QGA70910.1|Anjon virus, lcl ORF1 NODE 1 length 6489 cov
## 89.025421 MEMO067 33 6368, ...
## Node labels:
##   Root, 100, 100, 100, 100, 100, ...
## 
## Rooted; includes branch lengths.
## 
## with the following features available:
##   'SH_aLRT', 'UFboot'.
## 
## # The associated data tibble abstraction: 177 × 9
## # The 'node', 'label' and 'isTip' are from the phylo tree.
##     node label         Family   Genus  Host    Geo_Location isTip SH_aLRT UFboot
##    <int> <chr>         <fct>    <fct>  <fct>   <fct>        <lgl>   <dbl>  <dbl>
##  1     1 QDZ58984.1|N… Phasmav… Ortho… unknown USA          TRUE       NA     NA
##  2     2 YP_009362029… Phasmav… Ortho… Chaobo… USA          TRUE       NA     NA
##  3     3 ASA47399.1|C… Phasmav… Ortho… Culex … Australia    TRUE       NA     NA
##  4     4 ASA47278.1|C… Phasmav… Ortho… Culex … Australia    TRUE       NA     NA
##  5     5 QGA70910.1|A… unclass… uncla… Culex   Sweden       TRUE       NA     NA
##  6     6 lcl ORF1 NOD… NODE     NODE   NODE    NODE         TRUE       NA     NA
##  7     7 QTW97787.1|W… Phasmav… Ortho… Culici… China        TRUE       NA     NA
##  8     8 YP_009305135… Phasmav… Ortho… Culex … China        TRUE       NA     NA
##  9     9 QHA33845.1|C… Phasmav… uncla… Manson… Colombia     TRUE       NA     NA
## 10    10 QRW41769.1|M… unclass… uncla… Culex … USA          TRUE       NA     NA
## # … with 167 more rows

Give clean metadata ictv!

bunyacladedf <- mrca_ictv(tree = bunya_tree, meta = bunya_ictv_clean, 
                          group="Genus")

famvec <- sort(unique(bunya_clean$Family))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.4, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

bunya_tree@phylo$tip.label <- gsub(bunya_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)
                   
p <- plot_phylotree(bunya_tree, col=col, shape=shape, cladedf = bunyacladedf, 
                    labels=c(famvec, "Belgian mosquitoes"),
               plainlabels = c("unclassified", "Belgian mosquitoes"), align=F)+
  geom_nodepoint(aes(subset= node %in% bunyacladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,6)

bunyaplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4))
bunyaplot

ggsave(filename = here(phylo_path, "Bunyavirales/bunyaviridae.pdf"), bunyaplot, dpi=1200, width=6.875)

bunyaplot_minimal <- plot_minimal_phylotree(bunya_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,6.5)
bunyaplot_minimal

ggsave(filename = here(phylo_path, "Bunyavirales/bunyaviridae_minimal.pdf"), bunyaplot_minimal, dpi=1200, width=6.875)

3.2 Endornaviridae

meta.endorna <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Endornaviridae/"), pattern = "*.csv", full.names = T), read_csv))
endorna.ictv <- read_csv(here(phylo_path, "Endornaviridae/endornaviridae_ictv.csv"))
endorna <- read.iqtree(here(phylo_path, "Endornaviridae/endornaviridae.treefile"))
endorna@phylo <- phytools::midpoint.root(endorna@phylo)

endorna@phylo$tip.label <- gsub(endorna@phylo$tip.label, pattern = "_", replacement=" ")
endorna@phylo$tip.label <- gsub(endorna@phylo$tip.label, pattern = "P ", replacement="P_")
endorna@phylo$tip.label <- gsub(endorna@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

endorna_clean <- clean_phylometa(endorna, metadata = meta.endorna)
endorna_ictv_clean <- clean_phylometa(endorna, metadata = endorna.ictv)

endorna_tree <- group_phylotaxa(endorna, endorna_clean, group = c("Family","Genus", "Host", "Geo_Location"))
endorna_tree
## 'treedata' S4 object that stored information
## of
##  '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes
## (2022)/BMV-analysis/phylogenetics/Endornaviridae/endornaviridae.treefile'.
## 
## ...@ phylo:
## 
## Phylogenetic tree with 35 tips and 34 internal nodes.
## 
## Tip labels:
##   AJA41110.1|Alternaria brassicicola betaendornavirus 1, BAT32944.1|Rosellinia
## necatrix betaendornavirus 1, ADU64759.1|Tuber aestivum betaendornavirus,
## AXX39023.1|Sclerotinia minor betaendornavirus 1, ABD73305.1|Gremmeniella
## abietina betaendornavirus 1, AHN50398.1|Sclerotinia sclerotiorum
## betaendornavirus 1, ...
## Node labels:
##   Root, 100, 98, 99, 100, 100, ...
## 
## Rooted; includes branch lengths.
## 
## with the following features available:
##   'SH_aLRT', 'UFboot'.
## 
## # The associated data tibble abstraction: 69 × 9
## # The 'node', 'label' and 'isTip' are from the phylo tree.
##     node label         Family   Genus  Host    Geo_Location isTip SH_aLRT UFboot
##    <int> <chr>         <fct>    <fct>  <fct>   <fct>        <lgl>   <dbl>  <dbl>
##  1     1 AJA41110.1|A… Endorna… Betae… Altern… China        TRUE       NA     NA
##  2     2 BAT32944.1|R… Endorna… Betae… Rosell… Japan        TRUE       NA     NA
##  3     3 ADU64759.1|T… Endorna… Betae… Tuber … Hungary      TRUE       NA     NA
##  4     4 AXX39023.1|S… Endorna… Betae… Sclero… China        TRUE       NA     NA
##  5     5 ABD73305.1|G… Endorna… Betae… Gremme… Finland      TRUE       NA     NA
##  6     6 AHN50398.1|S… Endorna… Betae… Sclero… New Zealand  TRUE       NA     NA
##  7     7 AOZ66245.1|B… Endorna… Betae… Botryt… China        TRUE       NA     NA
##  8     8 QHB50297.1|B… Endorna… Alpha… Pipera… Slovakia     TRUE       NA     NA
##  9     9 YP_009506353… Endorna… Alpha… Phaseo… Brazil       TRUE       NA     NA
## 10    10 BBN67127.1|F… Endorna… uncla… Fagopy… Japan        TRUE       NA     NA
## # … with 59 more rows

Give clean metadata ictv!

endornacladedf <- mrca_ictv(tree= endorna_tree, meta = endorna_ictv_clean,
                            group = "Genus")

famvec <- sort(unique(endorna_clean$Genus))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.7, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

endorna_tree@phylo$tip.label <- gsub(endorna_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)

p <- plot_phylotree(endorna_tree, col=col, shape=shape, cladedf = endornacladedf, labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F, group="Genus")+
  geom_nodepoint(aes(subset= node %in% endornacladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,4)

endornaplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4),
        legend.position = c(0.1,0.15))
endornaplot

ggsave(filename = here(phylo_path, "Endornaviridae/endornaviridae.pdf"), endornaplot, dpi=1200, width=6.875)

endornaplot_minimal <- plot_minimal_phylotree(endorna_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,4.5)
endornaplot_minimal

ggsave(filename = here(phylo_path, "Endornaviridae/endornaviridae_minimal.pdf"), endornaplot_minimal, dpi=1200, width=6.875)

3.3 Ghabrivirales

meta.ghabri <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Toti_Chryso/"), pattern = "*.csv", full.names = T), read_csv))
ghabri.ictv <- read_csv(here(phylo_path, "Toti_Chryso/ghabrivirales_ictv.csv"))
ghabri <- read.iqtree(here(phylo_path, "Toti_Chryso/ghabrivirales.treefile"))
ghabri@phylo <- phytools::midpoint.root(ghabri@phylo)

ghabri@phylo$tip.label <- gsub(ghabri@phylo$tip.label, pattern = "_", replacement=" ")
ghabri@phylo$tip.label <- gsub(ghabri@phylo$tip.label, pattern = "P ", replacement="P_")
ghabri@phylo$tip.label <- gsub(ghabri@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

ghabri_clean <- clean_phylometa(ghabri, metadata = meta.ghabri)
ghabri_ictv_clean <- clean_phylometa(ghabri, metadata = ghabri.ictv)

ghabri_tree <- group_phylotaxa(ghabri, ghabri_clean, group = c("Family","Genus", "Host", "Geo_Location"))
ghabri_tree
## 'treedata' S4 object that stored information
## of
##  '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes
## (2022)/BMV-analysis/phylogenetics/Toti_Chryso/ghabrivirales.treefile'.
## 
## ...@ phylo:
## 
## Phylogenetic tree with 105 tips and 104 internal nodes.
## 
## Tip labels:
##   YP_010085118.1|Shuangao insect-associated chrysovirus,
## QTW97792.1|Chrysoviridae sp., QRW42729.1|Hubei chryso-like virus 1,
## QRD99892.1|Soufli chryso-like virus, QRD99893.1|Xanthi chryso-like virus, lcl
## ORF1 NODE 13 length 3516 cov 325.585345 NEMO33, ...
## Node labels:
##   Root, 78, 99, 100, 100, 98, ...
## 
## Rooted; includes branch lengths.
## 
## with the following features available:
##   'SH_aLRT', 'UFboot'.
## 
## # The associated data tibble abstraction: 209 × 9
## # The 'node', 'label' and 'isTip' are from the phylo tree.
##     node label         Family   Genus  Host    Geo_Location isTip SH_aLRT UFboot
##    <int> <chr>         <fct>    <fct>  <fct>   <fct>        <lgl>   <dbl>  <dbl>
##  1     1 YP_010085118… Chrysov… Alpha… unknown Australia    TRUE       NA     NA
##  2     2 QTW97792.1|C… Chrysov… uncla… unknown China        TRUE       NA     NA
##  3     3 QRW42729.1|H… unclass… uncla… Culex … USA          TRUE       NA     NA
##  4     4 QRD99892.1|S… Chrysov… uncla… unknown Greece       TRUE       NA     NA
##  5     5 QRD99893.1|X… Chrysov… uncla… unknown Greece       TRUE       NA     NA
##  6     6 lcl ORF1 NOD… NODE     NODE   NODE    NODE         TRUE       NA     NA
##  7     7 QLJ83488.1|B… Chrysov… uncla… unknown Australia    TRUE       NA     NA
##  8     8 QRD99909.1|E… Chrysov… uncla… unknown Greece       TRUE       NA     NA
##  9     9 QRW42852.1|K… unclass… uncla… Culex … USA          TRUE       NA     NA
## 10    10 QHA33836.1|S… Chrysov… Alpha… unknown Colombia     TRUE       NA     NA
## # … with 199 more rows

Give clean metadata ictv!

ghabricladedf <- mrca_ictv(tree=ghabri_tree, meta =ghabri_ictv_clean, 
                          group="Genus", subset = c("unclassified", "Chrysovirus"))

famvec <- sort(unique(ghabri_clean$Family))
famvec
## [1] "Chrysoviridae"    "Megabirnaviridae" "Quadriviridae"    "Totiviridae"     
## [5] "unclassified"
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.2, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

ghabri_tree@phylo$tip.label <- gsub(ghabri_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)

p <- plot_phylotree(ghabri_tree, col=col, shape=shape, cladedf = ghabricladedf, labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F)+
  geom_nodepoint(aes(subset= node %in% ghabricladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,4)
ghabriplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4),
        legend.position = c(0.15,0.15))
ghabriplot

ggsave(filename = here(phylo_path, "Toti_Chryso/ghabrivirales.pdf"), ghabriplot, dpi=1200, width=6.875)

ghabriplot_minimal <- plot_minimal_phylotree(ghabri_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,4.5)
ghabriplot_minimal

ggsave(filename = here(phylo_path, "Toti_Chryso/ghabrivirales_minimal.pdf"), ghabriplot_minimal, dpi=1200, width=6.875)

3.4 Nodamuvirales

Rerun tree with hmv4!

meta.noda <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Nodaviridae/"), pattern = "*.csv", full.names = T), read_csv))
noda.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Nodaviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
noda <- read.iqtree(here(phylo_path, "Nodaviridae/nodamuvirales.treefile"))
noda@phylo <- phytools::midpoint.root(noda@phylo)

noda@phylo$tip.label <- gsub(noda@phylo$tip.label, pattern = "_", replacement=" ")
noda@phylo$tip.label <- gsub(noda@phylo$tip.label, pattern = "P ", replacement="P_")
noda@phylo$tip.label <- gsub(noda@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

noda_clean <- clean_phylometa(noda, metadata = meta.noda)
noda_ictv_clean <- clean_phylometa(noda, metadata = noda.ictv)

noda_tree <- group_phylotaxa(noda, noda_clean, group = c("Family","Genus", "Host", "Geo_Location"))
noda_tree
## 'treedata' S4 object that stored information
## of
##  '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes
## (2022)/BMV-analysis/phylogenetics/Nodaviridae/nodamuvirales.treefile'.
## 
## ...@ phylo:
## 
## Phylogenetic tree with 57 tips and 56 internal nodes.
## 
## Tip labels:
##   QIJ25871.1|Smithfield permutotetra-like virus, AXQ04818.1|Culex
## Daeseongdong-like virus, lcl ORF1 NODE 1 length 4935 cov 9340.893989 NEMO21,
## YP_009182196.1|Daeseongdong virus 2, QQD36920.1|Aedes permutotetra-like virus
## 1, QGL51734.1|Vespa velutina associated permutotetra-like virus 1, ...
## Node labels:
##   Root, 73, 46, 30, 34, 100, ...
## 
## Rooted; includes branch lengths.
## 
## with the following features available:
##   'SH_aLRT', 'UFboot'.
## 
## # The associated data tibble abstraction: 113 × 9
## # The 'node', 'label' and 'isTip' are from the phylo tree.
##     node label         Family   Genus  Host    Geo_Location isTip SH_aLRT UFboot
##    <int> <chr>         <fct>    <fct>  <fct>   <fct>        <lgl>   <dbl>  <dbl>
##  1     1 QIJ25871.1|S… unclass… uncla… Arthro… Australia    TRUE       NA     NA
##  2     2 AXQ04818.1|C… Nodavir… uncla… Culex   USA          TRUE       NA     NA
##  3     3 lcl ORF1 NOD… NODE     NODE   NODE    NODE         TRUE       NA     NA
##  4     4 YP_009182196… unclass… uncla… Culex … South Korea  TRUE       NA     NA
##  5     5 QQD36920.1|A… Permuto… uncla… unknown Brazil       TRUE       NA     NA
##  6     6 QGL51734.1|V… Permuto… uncla… Vespa … unknown      TRUE       NA     NA
##  7     7 QGL51736.1|V… Permuto… uncla… Vespa … unknown      TRUE       NA     NA
##  8     8 AKH67351.1|D… unclass… uncla… unknown Kenya        TRUE       NA     NA
##  9     9 ACU32793.1|D… Permuto… uncla… Drosop… USA          TRUE       NA     NA
## 10    10 YP_009337276… unclass… uncla… Culici… China        TRUE       NA     NA
## # … with 103 more rows

Give clean metadata ictv!

nodacladedf <- mrca_ictv(tree = noda_tree, meta = noda_ictv_clean, group = "Genus")

famvec <- sort(unique(noda_clean$Family))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.4, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

noda_tree@phylo$tip.label <- gsub(noda_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)

p <- plot_phylotree(noda_tree, col=col, shape=shape, cladedf = nodacladedf, labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F)+
  geom_nodepoint(aes(subset= node %in% nodacladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,4)

nodaplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4),
        legend.position = c(0.1,0.15))
nodaplot

ggsave(filename = here(phylo_path, "Nodaviridae/nodamuvirales.pdf"), nodaplot, dpi=1200, width=6.875)

nodaplot_minimal <- plot_minimal_phylotree(noda_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,5)
nodaplot_minimal

ggsave(filename = here(phylo_path, "Nodaviridae/nodamuvirales_minimal.pdf"), nodaplot_minimal, dpi=1200, width=6.875)

3.5 Orthomyxoviridae

meta.orthomyxo <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Orthomyxoviridae/"), pattern = "*.csv", full.names = T), read_csv))
orthomyxo.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Orthomyxoviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
orthomyxo <- read.iqtree(here(phylo_path, "Orthomyxoviridae/orthomyxoviridae.treefile"))
orthomyxo@phylo <- phytools::midpoint.root(orthomyxo@phylo)

orthomyxo@phylo$tip.label <- gsub(orthomyxo@phylo$tip.label, pattern = "_", replacement=" ")
orthomyxo@phylo$tip.label <- gsub(orthomyxo@phylo$tip.label, pattern = "P ", replacement="P_")
orthomyxo@phylo$tip.label <- gsub(orthomyxo@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

orthomyxo_clean <- clean_phylometa(orthomyxo, metadata = meta.orthomyxo)
orthomyxo_ictv_clean <- clean_phylometa(orthomyxo, metadata = orthomyxo.ictv)

orthomyxo_tree <- group_phylotaxa(orthomyxo, orthomyxo_clean, group = c("Family","Genus", "Host", "Geo_Location"))
orthomyxo_tree
## 'treedata' S4 object that stored information
## of
##  '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes
## (2022)/BMV-analysis/phylogenetics/Orthomyxoviridae/orthomyxoviridae.treefile'.
## 
## ...@ phylo:
## 
## Phylogenetic tree with 37 tips and 36 internal nodes.
## 
## Tip labels:
##   lcl NODE 2 length 2469 cov 48.088211 MEMO067, YP_009508043.1|Quaranfil
## quaranjavirus, YP_009665204.1|Johnston Atoll quaranjavirus, YP_145804.1|Salmon
## isavirus, YP_145794.1|Thogoto thogotovirus, YP_009352882.1|Dhori thogotovirus,
## ...
## Node labels:
##   Root, , 100, 95, 100, 100, ...
## 
## Rooted; includes branch lengths.
## 
## with the following features available:
##   'SH_aLRT', 'UFboot'.
## 
## # The associated data tibble abstraction: 73 × 9
## # The 'node', 'label' and 'isTip' are from the phylo tree.
##     node label         Family   Genus   Host   Geo_Location isTip SH_aLRT UFboot
##    <int> <chr>         <fct>    <fct>   <fct>  <fct>        <lgl>   <dbl>  <dbl>
##  1     1 lcl NODE 2 l… NODE     NODE    NODE   NODE         TRUE       NA     NA
##  2     2 YP_009508043… Orthomy… Quaran… Argas… Afghanistan  TRUE       NA     NA
##  3     3 YP_009665204… Orthomy… Quaran… Ixodo… New Zealand  TRUE       NA     NA
##  4     4 YP_145804.1|… Orthomy… Isavir… unkno… Canada       TRUE       NA     NA
##  5     5 YP_145794.1|… Orthomy… Thogot… unkno… unknown      TRUE       NA     NA
##  6     6 YP_009352882… Orthomy… Thogot… unkno… unknown      TRUE       NA     NA
##  7     7 NP_056657.1|… Orthomy… Betain… unkno… unknown      TRUE       NA     NA
##  8     8 NP_040985.1|… Orthomy… Alphai… unkno… Puerto Rico  TRUE       NA     NA
##  9     9 YP_009449556… Orthomy… Deltai… Sus s… USA          TRUE       NA     NA
## 10    10 YP_089653.1|… Orthomy… Gammai… unkno… unknown      TRUE       NA     NA
## # … with 63 more rows

Give clean metadata ictv!

orthocladedf <- mrca_ictv(tree=orthomyxo_tree, meta = orthomyxo_ictv_clean, 
                          group="Genus")

famvec <- sort(unique(orthomyxo_clean$Genus))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

orthomyxo_tree@phylo$tip.label <- gsub(orthomyxo_tree@phylo$tip.label, pattern = "lcl [ORF]*[0-9]* *", replacement="", perl = T)

p <- plot_phylotree(orthomyxo_tree, col=col, shape=shape, cladedf = orthocladedf, labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F, group="Genus")+
  geom_nodepoint(aes(subset= node %in% orthocladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,4.5)

orthoplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4))
orthoplot

ggsave(filename = here(phylo_path, "Orthomyxoviridae/orthomyxoviridae.pdf"), orthoplot, dpi=1200, width=6.875)

orthoplot_minimal <- plot_minimal_phylotree(orthomyxo_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,5)
orthoplot_minimal

ggsave(filename = here(phylo_path, "Orthomyxoviridae/orthomyxoviridae_minimal.pdf"), orthoplot_minimal, dpi=1200, width=6.875)

3.6 Picornavirales

meta.picorna <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Picorna/"), pattern = "*.csv", full.names = T), read_csv))
picorna.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Picorna/"), pattern = "*_ictv.csv", full.names = T), read_csv))
picorna <- read.iqtree(here(phylo_path, "Picorna/picornavirales.treefile"))
picorna@phylo <- phytools::midpoint.root(picorna@phylo)

picorna@phylo$tip.label <- gsub(picorna@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

picorna_clean <- clean_phylometa(picorna, metadata = meta.picorna)
picorna_ictv_clean <- clean_phylometa(picorna, metadata = picorna.ictv)

picorna_tree <- group_phylotaxa(picorna, picorna_clean, 
                                group = c("Family","Genus", "Host", "Geo_Location"))

Give clean metadata ictv!

picornacladedf <- mrca_ictv(tree=picorna_tree, meta = picorna_ictv_clean, 
                          group="Genus", subset="unclassified")#, subset = "Cripavirus")

famvec <- sort(unique(picorna_clean$Family))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.2, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

picorna_tree@phylo$tip.label <- gsub(picorna_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)


p <- plot_phylotree(picorna_tree, col=col, shape=shape, cladedf = picornacladedf, labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F)+
  geom_nodepoint(aes(subset= node %in% picornacladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,4)

picornaplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4),
        legend.position = c(.8,.15))
picornaplot

picornaplot_minimal <- plot_minimal_phylotree(picorna_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,4.5)
picornaplot_minimal

ggsave(filename = here(phylo_path, "Picorna/picornavirales_minimal.pdf"), picornaplot_minimal, dpi=1200, width=6.875)

3.7 Reoviridae

meta.reo <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Reoviridae/"), pattern = "*.csv", full.names = T), read_csv))
reo.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Reoviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
reo <- read.iqtree(here(phylo_path, "Reoviridae/reoviridae.treefile"))
reo@phylo <- phytools::midpoint.root(reo@phylo)

reo@phylo$tip.label <- gsub(reo@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

reo_clean <- clean_phylometa(reo, metadata = meta.reo)
reo_ictv_clean <- clean_phylometa(reo, metadata = reo.ictv)

reo_tree <- group_phylotaxa(reo, reo_clean, group = c("Family","Genus", "Host", "Geo_Location"))
reo_tree@phylo$tip.label
##  [1] "AFH41509.1|Eubenangee virus"                                        
##  [2] "AGX00987.1|Wallal virus"                                            
##  [3] "AIT55713.1|Warrego virus"                                           
##  [4] "AGY34642.1|Changuinola virus"                                       
##  [5] "ACJ06234.1|Equine encephalosis virus"                               
##  [6] "AFX73387.1|Orungo virus"                                            
##  [7] "AFX73376.1|Lebombo virus"                                           
##  [8] "AAC40586.1|African horse sickness virus"                            
##  [9] "QCU80060.1|Palyam virus"                                            
## [10] "AVO64741.1|Ninarumi virus"                                          
## [11] "AEE98368.1|Umatilla virus"                                          
## [12] "BAP18631.1|Umatilla virus"                                          
## [13] "YP_002925132.1|Stretch Lagoon orbivirus"                            
## [14] "UBB38843.1|Wongorr virus"                                           
## [15] "AXF35757.1|Kammavanpettai virus"                                    
## [16] "QWF36631.1|Mudumu virus"                                            
## [17] "ANH10670.1|Parry's Lagoon virus"                                    
## [18] "QWE80471.1|Corriparta virus"                                        
## [19] "lcl ORF1 NODE 1 length 3938 cov 956.313131 NEMO25 3933 70"          
## [20] "ABB72770.1|Peruvian horse sickness virus"                           
## [21] "QNH88362.1|Peruvian horse sickness virus"                           
## [22] "AAW28767.1|Yunnan orbivirus"                                        
## [23] "BCL59280.1|Yunnan orbivirus"                                        
## [24] "UBT83574.1|Middle Point orbivirus"                                  
## [25] "BCB92306.1|Yonaguni orbivirus"                                      
## [26] "AYA60488.1|Mobuck virus"                                            
## [27] "QRW41814.1|Lobuck virus"                                            
## [28] "QCQ85356.1|CHeRI orbivirus 2-2"                                     
## [29] "BCL59290.1|Guangxi orbivirus"                                       
## [30] "ADM88592.1|Great Island virus"                                      
## [31] "ATW68806.1|Okhotskiy virus"                                         
## [32] "AHM93463.1|Great Island virus"                                      
## [33] "AYM94291.1|Great Island virus"                                      
## [34] "AKP24073.1|Wad Medani virus"                                        
## [35] "AKP24085.1|Chenuda virus"                                           
## [36] "ATW68818.1|Baku virus"                                              
## [37] "AKP24097.1|Chobar Gorge virus"                                      
## [38] "QBL15251.1|Chobar Gorge virus"                                      
## [39] "QBL15263.1|Chobar Gorge virus"                                      
## [40] "AGE32260.1|Sathuvachari virus"                                      
## [41] "AGE32270.1|Orbivirus JKT-8132"                                      
## [42] "BBE08033.1|Sathuvachari virus"                                      
## [43] "AVO64731.1|Big Cypress virus"                                       
## [44] "AZJ37597.1|Skunk River virus"                                       
## [45] "QNS17459.1|Serbia reo-like virus 2"                                 
## [46] "QVU40006.1|Amate virus"                                             
## [47] "AAG34363.1|St Croix River virus"                                    
## [48] "APG79108.1|Hubei lepidoptera virus 4"                               
## [49] "AQU42768.1|Morris orbivirus"                                        
## [50] "APG79103.1|Hubei lepidoptera virus 5"                               
## [51] "QTW97832.1|Hubei reo-like virus 12"                                 
## [52] "ASU50444.1|Anopheles annulipes orbivirus"                           
## [53] "APG79077.1|Hubei tetragnatha maxillosa virus 9"                     
## [54] "APG79144.1|Hubei odonate virus 15"                                  
## [55] "ASU43982.1|Anopheles hinesorum orbivirus"                           
## [56] "lcl ORF2 NODE 1 length 4140 cov 359.250554 MEMO015 1249 4092 pasted"
## [57] "QNS17458.1|Serbia reo-like virus 1"                                 
## [58] "DAZ85690.1|Aedes orbi-like virus"                                   
## [59] "QRW41688.1|Lasigmu virus"                                           
## [60] "QIJ70109.1|Hubei reo-like virus 14"                                 
## [61] "AVM87459.1|Wenling scaldfish reovirus"                              
## [62] "BAF49639.1|Rice gall dwarf virus"                                   
## [63] "AAB18743.1|Rice dwarf virus"                                        
## [64] "AAZ94068.1|Aedes pseudoscutellaris reovirus"                        
## [65] "ACA53380.1|Cypovirus 16"                                            
## [66] "AHJ14791.1|Cypovirus 2"                                             
## [67] "AAK73521.1|Cypovirus 1"                                             
## [68] "AAK73087.1|Cypovirus 14"                                            
## [69] "AAO73182.1|Mal de Rio Cuarto virus"                                 
## [70] "AAK40249.1|Fiji disease virus"                                      
## [71] "BAA08542.1|Nilaparvata lugens reovirus"                             
## [72] "AAG34362.1|Colorado tick fever coltivirus"                          
## [73] "ATG88075.1|Tai Forest coltivirus"                                   
## [74] "AXG65493.1|Kundal coltivirus"                                       
## [75] "BBA54735.1|Tarumizu coltivirus"                                     
## [76] "AAW29084.1|Liao ning virus"                                         
## [77] "AAF78848.1|Kadipiro virus"                                          
## [78] "AAF78849.1|Banna virus"                                             
## [79] "AKC01920.1|Eriocheir sinensis reovirus"

Give clean metadata ictv!

reocladedf <- mrca_ictv(tree=reo_tree, meta =reo_ictv_clean, 
                        group="Genus")

famvec <- sort(unique(reo_clean$Genus))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

reo_tree@phylo$tip.label <- gsub(reo_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)
p <- plot_phylotree(reo_tree, col=col, shape=shape, cladedf = reocladedf, labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F, group="Genus")+
  geom_nodepoint(aes(subset = node %in% reocladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,7)
reoplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4),
        legend.position = c(0.7,.3))
reoplot

ggsave(filename = here(phylo_path, "Reoviridae/reoviridae.pdf"), reoplot, dpi=1200, width=6.875)

reoplot_minimal <- plot_minimal_phylotree(reo_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,9)
reoplot_minimal

ggsave(filename = here(phylo_path, "Reoviridae/reoviridae_minimal.pdf"), reoplot_minimal, dpi=1200, width=6.875)

3.8 Tombusviridae

meta.tombus <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Tombusviridae/"), pattern = "*.csv", full.names = T), read_csv))
tombus.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Tombusviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
tombus <- read.iqtree(here(phylo_path, "Tombusviridae/tombusviridae2.treefile"))
tombus@phylo <- phytools::midpoint.root(tombus@phylo)

tombus@phylo$tip.label <- gsub(tombus@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

tombus_clean <- clean_phylometa(tombus, metadata = meta.tombus)
tombus_ictv_clean <- clean_phylometa(tombus, metadata = tombus.ictv)

tombus_tree <- group_phylotaxa(tombus, tombus_clean, 
                               group = c("Family","Genus", "Host", "Geo_Location"))

Give clean metadata ictv!

tombuscladedf <- mrca_ictv(tree = tombus_tree, meta = tombus_ictv_clean, 
                          group="Genus")

famvec <- sort(unique(tombus_clean$Genus))

col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

tombus_tree@phylo$tip.label <- gsub(tombus_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)


p <- plot_phylotree(tombus_tree, col=col, shape=shape, cladedf = tombuscladedf,
                    labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F,
                    group="Genus")+
  geom_nodepoint(aes(subset= node %in% tombuscladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,3.5)
tombusplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4),
        legend.position = c(0.1,.2))
tombusplot

ggsave(filename = here(phylo_path, "Tombusviridae/tombusviridae.pdf"), tombusplot, dpi=1200, width=6.875)

tombusplot_minimal <- plot_minimal_phylotree(tombus_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,5)
tombusplot_minimal

ggsave(filename = here(phylo_path, "Tombusviridae/tombusviridae_minimal.pdf"), tombusplot_minimal, dpi=1200, width=6.875)

3.9 Negevirus

meta.negev <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Negevirus/"), pattern = "*.csv", full.names = T), read_csv))
#tombus.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Tombusviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
negev <- read.iqtree(here(phylo_path, "Negevirus/negeviruses.treefile"))
negev@phylo <- phytools::midpoint.root(negev@phylo)

negev@phylo$tip.label <- gsub(negev@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")

negev_clean <- clean_phylometa_negev(negev, metadata = meta.negev)

#negev_ictv_clean <- clean_phylometa(negev, metadata = negev.ictv)

negev_tree <- group_phylotaxa(negev, negev_clean, 
                               group = c("Family","Genus", "Host", "Geo_Location"))

Give clean metadata ictv!

# negevcladedf <- mrca_ictv(tree = negev_tree, meta = negev_ictv_clean, 
#                            group="Genus")

famvec <- sort(unique(negev_clean$Genus))

col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
shape <- rep(16, length(famvec))
names(shape) <- famvec
shape <- c(shape, NODE=17)

negev_tree@phylo$tip.label <- gsub(negev_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)

p <- plot_phylotree(negev_tree, col=col, shape=shape,
                    labels=c(famvec, "Belgian mosquitoes"), 
                    plainlabels = c("unclassified", "Belgian mosquitoes"), align=F,
                    group="Genus")+
  #geom_nodepoint(aes(subset= node %in% negevcladedf$node), size=1, shape=18)+
  scale_y_reverse()+
  xlim(-.05,3.5)

negevplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
  theme(legend.text = element_text(vjust = .4),
        legend.position = c(0.2,.2))
negevplot

ggsave(filename = here(phylo_path, "Negevirus/negeviruses.pdf"), negevplot, dpi=1200, width=6.875)

negevplot_minimal <- plot_minimal_phylotree(negev_tree, align=T)+
  scale_y_reverse()+
  xlim(-.05,5)
negevplot_minimal

ggsave(filename = here(phylo_path, "Negevirus/negeviruses_minimal.pdf"), tombusplot_minimal, dpi=1200, width=6.875)

3.10 Combine phylogenetic trees

# p1 <- ggarrange(endornaplot, orthoplot,
#                 bunyaplot, picornaplot,
#                 ghabriplot, reoplot,
#                 ncol=2, nrow=3, labels="AUTO",
#                 heights = c(.2,.4,.4))

# p1 <- ggarrange(endornaplot, orthoplot,
#                 bunyaplot, picornaplot,
#                 ghabriplot, reoplot,
#                 tombusplot, negevplot,
#                 ncol=2, nrow=4, labels="AUTO",
#                 heights = c(.15,.25,.25,.25))
# p1

p1 <- ggarrange(negevplot, endornaplot, orthoplot,
                picornaplot, bunyaplot, 
                ghabriplot, #reoplot,
                #tombusplot, negevplot,
                ncol=3, nrow=2, labels="AUTO",
                heights = c(.4,.6))
# ggarrange(p1, 
#           ggarrange(NULL, tombusplot+theme(legend.position = "right"), NULL, 
#                     widths=c(0.1,0.8,0.1), labels=c("","G",""), ncol=3), 
#           ncol=1, heights = c(.75, .25))
ggarrange(p1, 
          ggarrange(reoplot, tombusplot,#+theme(legend.position = "right"), 
                    widths=c(0.5,0.5), labels=c("G","H"), ncol=2), 
          ncol=1, heights = c(.7, .3))

#ggsave("figures/treeplots.pdf", width=17, height = 28, units = "in", dpi=300)
ggsave("figures/treeplots.pdf", width=20, height = 20, units = "in", dpi=600)
ggsave("figures/treeplots_small.pdf", width=6.875, height = 9.0625, units = "in", dpi=1200)

Minimal phylogenetic trees:

# p2 <- ggarrange(endornaplot_minimal, orthoplot_minimal,
#                 bunyaplot_minimal, picornaplot_minimal,
#                 ghabriplot_minimal, reoplot_minimal,
#                 ncol=2, nrow=3, labels="AUTO",
#                 heights = c(.2,.4,.4))

p2 <- ggarrange(endornaplot_minimal, orthoplot_minimal, negevplot_minimal,
                bunyaplot_minimal, picornaplot_minimal, ghabriplot_minimal,
                ncol=3, nrow=2, labels="AUTO",
                heights = c(.4,.6))
# ggarrange(p2, 
#           ggarrange(NULL, tombusplot_minimal, NULL, 
#                     widths=c(0.1,0.8,0.1), labels=c("","G",""), ncol=3), 
#           ncol=1, heights = c(.75, .25))
ggarrange(p2, 
          ggarrange(reoplot_minimal, tombusplot_minimal, 
                    widths=c(0.5,0.5), labels=c("G","H"), ncol=2), 
          ncol=1, heights = c(.7, .3))

#ggsave("figures/treeplots_minimal.pdf", width=17, height = 28, units = "in", dpi=300)
ggsave("figures/treeplots_minimal.pdf", width=20, height = 20, units = "in", dpi=600)
ggsave("figures/treeplots_minimal_small.pdf", width=6.875, height = 9.0625, units = "in", dpi=600)

4 Phageome & Wolbachia bacteria

library(ggbeeswarm)
phage <- read.table("data/wolbachia_phage_ratio.tsv", sep="\t", header = T)
phage <- merge(phage, meta, by="Sample")
ggplot(phage, aes(x = SKA_Subspecies, y= ratio, color=ratio>4))+
  geom_beeswarm(cex=2)+
  scale_color_brewer(palette = "Set2", direction = -1)+
  theme_bw()+
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(face="italic"))

phageWO_real <- phage[phage$ratio>4,]$Sample

4.1 Wolbachia

Wolbachia bwa-mem2 mapped

wMel <- read.table("data/wMel.txt", header=F, dec=".", sep=" ")
wPip <- read.table("data/wPip.txt", header=F, dec=".", sep=" ")
phageWO <- read.table("data/phageWO.txt", header=F, dec=".", sep=" ")
colnames(wMel) <- c("Sample", "numreads", "covbases", "coverage", "meandepth")
colnames(wPip) <- c("Sample", "numreads", "covbases", "coverage", "meandepth")
colnames(phageWO) <- c("Sample", "supergroup", "numreads", "covbases", "coverage", "meandepth")
wPip$supergroup <- "wPip"
wMel$supergroup <- "wMel"

wolb <- rbind(wMel, wPip, phageWO)

wolb <- wolb %>% 
  plyr::mutate(supergroup=str_replace(string=supergroup, pattern = "NODE_12_length_11674_cov_94.907217_MEMO129", replacement = "phageWO2")) %>% 
  plyr::mutate(supergroup=str_replace(string=supergroup, pattern = "NODE_1_length_31159_cov_25.624799_MEMO050", replacement = "phageWO1"))
wolb$numreads[wolb$coverage<5] <- 0
wolb$meandepth[wolb$coverage<5] <- 0

wviolin <- pivot_wider(wolb, id_cols = "Sample", names_from = "supergroup", values_from = "numreads") %>% 
  ggplot(aes(x = meta[meta$Sample!="NEMO39",]$SKA_Subspecies, y=wPip))+
  geom_boxplot(aes(color=meta[meta$Sample!="NEMO39",]$SKA_Subspecies), width=0.1, outlier.shape = NA)+
  geom_violin(aes(fill=meta[meta$Sample!="NEMO39",]$SKA_Subspecies), color=NA, show.legend = F, trim = T)+
  geom_jitter(aes(color=meta[meta$Sample!="NEMO39",]$SKA_Subspecies), size=1, alpha=0.6,
              position=position_jitter(w=0.1,h=0)) +
  theme_bw()+
  theme(axis.text.x = element_blank(),
        panel.grid.major = element_blank(),
        axis.ticks.x = element_blank())+
  xlab(label = "")+
  ylab(label = "Wolbachia read count")+
  scale_fill_manual(values=alpha(myColors, 0.1), name='')+
  scale_color_viridis_d(begin=0.4, end =0.9, name="")+ 
  guides(fill = guide_legend(#override.aes = list(alpha = 1), 
    label.theme = element_text(size=10, angle = 0, face = "italic")))+
  scale_y_continuous(trans=scales::pseudo_log_trans(base = 10), breaks = c(0, 10^seq(2,8,2)))
wviolin

ggsave("figures/wolbachia_readcount.pdf", dpi=300)

barplot

wb <-  pivot_wider(wolb, id_cols = "Sample", names_from = "supergroup", values_from = "numreads") %>% 
  ggplot(aes(x = Sample, y=wPip))+
  geom_col(aes(fill=meta[meta$Sample!="NEMO39",]$SKA_Subspecies))+
  theme_bw()+
  theme(axis.text.x = element_blank(),
        panel.grid.major = element_blank())+
  scale_y_continuous(expand=expansion(mult = c(0, 0.01), add = c(0, 0.1)))+
  xlab(label = "Sample")+
  ylab(label = "Wolbachia read count")+
  labs(shape="Virus", color="Virus")+
  scale_fill_viridis_d(begin=0.4, end =0.9, name="")+ 
  guides(fill = guide_legend(override.aes = list(alpha = 1), 
                             label.theme = element_text(size=10, angle = 0, face = "italic")))+
  facet_grid(~meta[meta$Sample!="NEMO39",]$Municipality, scales = "free", space = "free")
wb

4.2 Phageome

phageotu<-read.table("data/BEmosq_classification-85-taxfile-1000nt.tsv", sep="\t", header = T)
phage<-phageotu[phageotu$Phylum == "Phage",]
rownames(phage)<- phage$Contig
phage <- subset(phage, select=-Contig)

newtax<-tax[!rownames(tax) %in% rownames(phage),]

newtax <- rbind(newtax, phage)

tax_table(BMV) <- as.matrix(newtax) #Maybe better solution

#newtax.UF <- tax_table(as.matrix(newtax))

#BMV <- phyloseq(OTU.UF, newtax.UF, meta.UF)

BMV.V <- subset_taxa(BMV, Kingdom=="Viruses")

phageome <- subset_taxa(BMV.V, Phylum=="Phage" | Order=="Caudovirales" | Family=="Inoviridae")
phageome
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 41 taxa and 197 samples ]
## sample_data() Sample Data:       [ 197 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 41 taxa by 7 taxonomic ranks ]
otu_table(phageome) <- as.data.frame(otu_table(phageome)) %>% 
  mutate(otu=rownames(.)) %>% 
  pivot_longer(cols=1:197, names_to = "Sample", values_to = "abundance") %>% 
  mutate(abundance=if_else(otu %in% c("NODE_1_length_31159_cov_25.624799_MEMO050|full", 
                                            "NODE_12_length_11674_cov_94.907217_MEMO129|full") 
                                 & Sample != phageWO_real, 0, as.double(abundance))) %>% 
  pivot_wider(names_from = Sample, values_from = abundance) %>% 
  column_to_rownames('otu') %>% 
  otu_table(taxa_are_rows = T)

#sample_data(phageome) <- data.frame(wv.meta, row.names = 1) 
#sample_data(phageome)

phage_pruned <- prune_samples(sample_sums(phageome)>0, phageome)
phage_pruned <- prune_taxa(taxa_sums(phage_pruned)>0, phage_pruned)
phage_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 30 taxa and 5 samples ]
## sample_data() Sample Data:       [ 5 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 30 taxa by 7 taxonomic ranks ]
pp <- as.data.frame(otu_table(phage_pruned))

p.merge <- merge_taxa(phage_pruned, 1:2, 2)
p.merge <- merge_taxa(p.merge, 2:3, 2)
p.merge <- merge_taxa(p.merge, 3:25, 2)
p.merge <- merge_taxa(p.merge, 4:5, 2)
tax_table(p.merge)
## Taxonomy Table:     [5 taxa by 7 taxonomic ranks]:
##                                                 Kingdom   Phylum       
## NODE_12_length_11674_cov_94.907217_MEMO129|full "Viruses" "Phage"      
## NODE_4_length_5754_cov_10.923727_MEMO048|full   "Viruses" NA           
## NODE_2_length_9043_cov_21.187821_MEMO141|full   "Viruses" NA           
## NODE_17_length_1194_cov_3.745748_MEMO017|full   "Viruses" "Uroviricota"
## NODE_1_length_5000_cov_44.805403_NEMO08|full    "Viruses" "Phage"      
##                                                 Class            Order         
## NODE_12_length_11674_cov_94.907217_MEMO129|full "unclassified"   "unclassified"
## NODE_4_length_5754_cov_10.923727_MEMO048|full   NA               NA            
## NODE_2_length_9043_cov_21.187821_MEMO141|full   NA               NA            
## NODE_17_length_1194_cov_3.745748_MEMO017|full   "Caudoviricetes" "Caudovirales"
## NODE_1_length_5000_cov_44.805403_NEMO08|full    "unclassified"   "unclassified"
##                                                 Family         Genus         
## NODE_12_length_11674_cov_94.907217_MEMO129|full "unclassified" "unclassified"
## NODE_4_length_5754_cov_10.923727_MEMO048|full   NA             NA            
## NODE_2_length_9043_cov_21.187821_MEMO141|full   NA             NA            
## NODE_17_length_1194_cov_3.745748_MEMO017|full   "Siphoviridae" "unclassified"
## NODE_1_length_5000_cov_44.805403_NEMO08|full    "unclassified" "unclassified"
##                                                 Species                       
## NODE_12_length_11674_cov_94.907217_MEMO129|full "phage WO"                    
## NODE_4_length_5754_cov_10.923727_MEMO048|full   NA                            
## NODE_2_length_9043_cov_21.187821_MEMO141|full   NA                            
## NODE_17_length_1194_cov_3.745748_MEMO017|full   "Erwinia phage Midgardsormr38"
## NODE_1_length_5000_cov_44.805403_NEMO08|full    "unclassified"
otu_table(p.merge)
## OTU Table:          [5 taxa and 5 samples]
##                      taxa are rows
##                                                 MEMO017 MEMO043 MEMO048 MEMO141
## NODE_12_length_11674_cov_94.907217_MEMO129|full       0    6096       0       0
## NODE_4_length_5754_cov_10.923727_MEMO048|full         0       0    3945       0
## NODE_2_length_9043_cov_21.187821_MEMO141|full         0       0       0   17529
## NODE_17_length_1194_cov_3.745748_MEMO017|full       561       0       0       0
## NODE_1_length_5000_cov_44.805403_NEMO08|full          0       0       0       0
##                                                 NEMO08
## NODE_12_length_11674_cov_94.907217_MEMO129|full      0
## NODE_4_length_5754_cov_10.923727_MEMO048|full        0
## NODE_2_length_9043_cov_21.187821_MEMO141|full        0
## NODE_17_length_1194_cov_3.745748_MEMO017|full        0
## NODE_1_length_5000_cov_44.805403_NEMO08|full      7751
plot_heatmap(p.merge, taxa.label = "Species", low ="#FFF7B8", high = "#800026", na.value = "#FFFFCC")+
  theme_bw()+
  theme(axis.title = element_blank())

phage_tax <- as.data.frame(tax_table(p.merge))
phage_tax <- replace_na(phage_tax, list(Phylum="Uroviricota", Class="Caudoviricetes", Order="Caudovirales", 
                                        Family="unclassified", Genus="unclassified", Species="unclassified"))
tax_table(p.merge) <- as.matrix(phage_tax)